Process raw WAV files

This notebook clips WAV files in the specified 'directory' to a specified 'clip_duration' starting from a time calculated from a time specified in the WAV filename and a specified 'watch_offset'.

Import statements


In [1]:
import os.path
from os import listdir
import wave
from audioop import ratecv
import datetime
import time

Variable declarations

directory – path to directory containing WAV files to be processed


In [2]:
directory = ""

In [2]:
# set clip duration
clip_duration = datetime.timedelta(minutes=5)

In [3]:
# set watch offset
watch_offset = datetime.timedelta(minutes=2, seconds=25)
watch_start_time = ''

In [4]:
# set time change offset
time_shift = datetime.timedelta(hours=0)

In [84]:
# specify specific start time
specific_start_time = {'enabled':True, 'hour':16, 'minute':45, 'second':10}

In [56]:
# set directories
directory = '/Volumes/jPassport/alpine soundscapes/field recordings/2016-03-07/'
os.chdir(directory)
clip_directory = directory + 'clipped/'
if os.path.exists(clip_directory):
    os.rmdir(clip_directory)
os.mkdir(clip_directory)

In [87]:
# find all WAV files in directory
#WAV_files = [f for f in listdir(directory) if os.path.isfile(os.path.join(directory, f)) and f.split('.')[1] == 'WAV']
WAV_files = [f for f in listdir(directory) if os.path.isfile(os.path.join(directory, f)) and f.split('.')[1] == 'WAV' and f == '160301-164729.WAV']
print(WAV_files)


['160301-164729.WAV']

In [88]:
# clip all WAV files in the specified directory

cliplog_path = './clipped/cliplog.txt'
log_count = 0
while os.path.exists(cliplog_path):
    log_count = log_count + 1
    cliplog_path = './clipped/cliplog_{0}.txt'.format(log_count)
cliplog = open(cliplog_path, 'w')
text = 'This operation was performed on %s' % str(datetime.datetime.now())
printlog(cliplog, text)
text = 'The watch offset is set to: %s' % str(watch_offset)
printlog(cliplog, text)
printlog(cliplog, ' ')
text = 'Found the following WAV files:'
printlog(cliplog, text)
printlog(cliplog, ' ')
for WAV_file in WAV_files:
    text = WAV_file
    printlog(cliplog, text)
printlog(cliplog, ' ')
text = 'Processing...'
printlog(cliplog, text)
printlog(cliplog, ' ')
for WAV_file in WAV_files:
    clip_offset, start_time = determine_clip_offset(WAV_file)
    clip_WAV_file(WAV_file, clip_offset, start_time, cliplog)
    printlog(cliplog, ' ')
printlog(cliplog, ' ')
text = 'Done clipping all WAV files in the specified directory.'
printlog(cliplog, text)
cliplog.close()


This operation was performed on 2016-05-08 17:06:27.391625
The watch offset is set to: 0:02:25
 
Found the following WAV files:
 
160301-164729.WAV
 
Processing...
 
160301-164729.WAV
watch start: 2016-03-01 16:45:04
start offset: 0:00:06
160301-164729.WAV --> will be clipped as --> 160301-164510.WAV
Clipping . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  done.
 
 
Done clipping all WAV files in the specified directory.

In [11]:
# open WAV file
def open_WAV_file(path, mode):
    WAV_file = wave.open(path, mode)
    return WAV_file

In [44]:
# determine clip_offset from time in filename

def determine_clip_offset(filename):

    time_string = filename.split('.')[0]
    time_year = int('20' + time_string[0:2])
    time_month = int(time_string[2:4])
    time_day = int(time_string[4:6])
    time_hour = int(time_string[7:9])
    time_minute = int(time_string[9:11])
    time_second = int(time_string[11:13])

    machine_start_time = datetime.datetime(year=time_year, month=time_month, day=time_day, hour=time_hour, minute=time_minute, second=time_second)
    global watch_start_time
    watch_start_time = (machine_start_time + time_shift) - watch_offset
    
    if specific_start_time['enabled']:
        start_time = watch_start_time.replace(hour=specific_start_time['hour'], minute=specific_start_time['minute'], second=specific_start_time['second'])
        end_time = start_time + clip_duration
        clip_offset = start_time - watch_start_time
    else:
        a, r = divmod(watch_start_time.minute, 5)
        start_minute = a*5 + 5
        if start_minute == 60:
            start_minute = 0
            start_hour = watch_start_time.hour + 1
        else:
            start_hour = watch_start_time.hour

        start_time = watch_start_time.replace(hour=start_hour, minute=start_minute, second=0)
        end_time = start_time + clip_duration

        clip_offset = start_time - watch_start_time
    return clip_offset, start_time

In [13]:
# clip WAV file

def clip_WAV_file(filename, clip_offset, start_time, cliplog):
    
    clip_filename = start_time.strftime('%y%m%d-%H%M%S.WAV')
    if os.path.exists(os.path.join(clip_directory, clip_filename)):
        os.remove(os.path.join(clip_directory, clip_filename))
    waveread = open_WAV_file(os.path.join(directory, filename), 'rb')
    wavewrite = open_WAV_file(os.path.join(clip_directory, clip_filename), 'wb')
    
    text = filename
    printlog(cliplog, text)
    text = 'watch start: %s' % str(watch_start_time)
    printlog(cliplog, text)
    text = 'start offset: %s' % str(clip_offset)
    printlog(cliplog, text)
    text = filename + ' --> will be clipped as --> ' + clip_filename
    printlog(cliplog, text)

    params = waveread.getparams()
    framerate = params[2]
    
    nframes_read = int(clip_duration.total_seconds()*framerate)
    nreads = 100
    buffer_size = nframes_read / nreads
    nframes_write = nframes_read
    
    wavewrite.setparams((params[0], params[1], framerate, nframes_write, params[4], params[5]))
    
    # report if time_shift is enabled
    if time_shift:
        text = 'NOTE: The clip was shifted %s hours forward! (daylight savings)' % str(time_shift)
        printlog(cliplog, text)
    
    # check if clip offset should be shifted forward
    recording_length = float(params[3])/framerate
    clip_offset_shift = 0
    while (clip_offset.total_seconds() + clip_duration.total_seconds()) > recording_length - 10:
        clip_offset = clip_offset - datetime.timedelta(seconds=1)
        clip_offset_shift = clip_offset_shift + 1
    if clip_offset_shift:
        text = 'NOTE: The clip offset was shifted %i seconds forward!' % clip_offset_shift
        printlog(cliplog, text)
    
    startpos = int(clip_offset.total_seconds()*framerate)
    waveread.setpos(startpos)
    
    print('Clipping'),
    for w in xrange(nreads):
        write_buffer = waveread.readframes(buffer_size)
        wavewrite.writeframes(write_buffer)
        print('.'),
    print(' done.')
    
    wavewrite.close()
    waveread.close()

In [14]:
# print and log text
def printlog(file, text):
    print(text)
    file.write(text)
    file.write('\n')

In [86]:
# check time offset
os.chdir(directory)
# find all WAV files in directory
WAV_files = [f for f in listdir(directory) if os.path.isfile(os.path.join(directory, f)) and f.split('.')[1] == 'WAV']
for WAV_file in WAV_files:
    clip_offset, start_time = determine_clip_offset(WAV_file)
    print('%s:' % WAV_file)
    print('offset: %s ' % str(clip_offset))
    print('start : %s ' % str(start_time))
    print('\n')


160301-143144.WAV:
offset: 2:15:51 
start : 2016-03-01 16:45:10 


160301-145158.WAV:
offset: 1:55:37 
start : 2016-03-01 16:45:10 


160301-153127.WAV:
offset: 1:16:08 
start : 2016-03-01 16:45:10 


160301-160124.WAV:
offset: 0:46:11 
start : 2016-03-01 16:45:10 


160301-161202.WAV:
offset: 0:35:33 
start : 2016-03-01 16:45:10 


160301-164729.WAV:
offset: 0:00:06 
start : 2016-03-01 16:45:10